{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1.数据处理\n",
    "抽取出只在训练集和测试集中出现的event"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存数据\n",
    "import cPickle\n",
    "\n",
    "import itertools\n",
    "\n",
    "#处理事件字符串\n",
    "import datetime\n",
    "import time\n",
    "\n",
    "import numpy as np\n",
    "import scipy.io as sio\n",
    "import scipy.sparse as ss\n",
    "\n",
    "#相似度/距离\n",
    "import scipy.spatial.distance as ssd\n",
    "\n",
    "from collections import defaultdict\n",
    "from sklearn.preprocessing import normalize\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.cluster import MiniBatchKMeans\n",
    "from sklearn import metrics\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'433929966': 0,\n",
       " '4234683959': 1,\n",
       " '679447655': 2,\n",
       " '773396729': 3,\n",
       " '3822707736': 4,\n",
       " '1588485860': 5,\n",
       " '1465884852': 6,\n",
       " '1968232351': 7,\n",
       " '453613647': 12240,\n",
       " '2475079669': 8928,\n",
       " '447444721': 10,\n",
       " '2301723344': 11,\n",
       " '4028072926': 12,\n",
       " '1935879458': 13,\n",
       " '3109164411': 14,\n",
       " '833858713': 15,\n",
       " '186124210': 16,\n",
       " '19652138': 8929,\n",
       " '4016804175': 18,\n",
       " '2726295050': 19,\n",
       " '1421433916': 11260,\n",
       " '4266442206': 20,\n",
       " '2299366787': 10502,\n",
       " '2313587840': 21,\n",
       " '733261478': 1459,\n",
       " '1458906695': 23,\n",
       " '2450977738': 24,\n",
       " '915187958': 25,\n",
       " '407288007': 26,\n",
       " '3768548263': 27,\n",
       " '3051088984': 28,\n",
       " '1933276938': 29,\n",
       " '1770070953': 30,\n",
       " '3965935592': 31,\n",
       " '2812330223': 32,\n",
       " '3323980704': 33,\n",
       " '4049041156': 34,\n",
       " '3696272381': 35,\n",
       " '1281252091': 36,\n",
       " '1319656469': 37,\n",
       " '1872379613': 38,\n",
       " '914775174': 39,\n",
       " '230117918': 41,\n",
       " '127163854': 42,\n",
       " '1929917726': 43,\n",
       " '2527738096': 44,\n",
       " '417123290': 45,\n",
       " '861702567': 8935,\n",
       " '1499530182': 46,\n",
       " '332142007': 47,\n",
       " '4126236405': 48,\n",
       " '1880617811': 49,\n",
       " '106978589': 50,\n",
       " '2850509209': 51,\n",
       " '2507275690': 52,\n",
       " '3770866498': 53,\n",
       " '3349548561': 54,\n",
       " '4096532695': 55,\n",
       " '1607711017': 56,\n",
       " '2043006756': 57,\n",
       " '2606383328': 58,\n",
       " '1977406305': 59,\n",
       " '3172060382': 60,\n",
       " '2646392315': 61,\n",
       " '2912281746': 62,\n",
       " '1907550340': 63,\n",
       " '774093053': 64,\n",
       " '3327557579': 65,\n",
       " '3802650890': 66,\n",
       " '2055922065': 67,\n",
       " '3711909804': 68,\n",
       " '3643346333': 11225,\n",
       " '2896451394': 70,\n",
       " '4201550847': 71,\n",
       " '2474982115': 72,\n",
       " '3805108320': 73,\n",
       " '650745497': 74,\n",
       " '1291141020': 434,\n",
       " '973265540': 76,\n",
       " '806826749': 77,\n",
       " '2840088969': 78,\n",
       " '3700950547': 79,\n",
       " '1000481836': 80,\n",
       " '1959421009': 81,\n",
       " '3217192264': 82,\n",
       " '894829625': 83,\n",
       " '4293596113': 84,\n",
       " '3446267401': 86,\n",
       " '2415873572': 88,\n",
       " '169266244': 89,\n",
       " '2422402842': 90,\n",
       " '4260493070': 11228,\n",
       " '3921048124': 8941,\n",
       " '256821264': 93,\n",
       " '1310511988': 94,\n",
       " '2768392299': 95,\n",
       " '345960905': 96,\n",
       " '265740872': 99,\n",
       " '1880156476': 100,\n",
       " '3179167671': 101,\n",
       " '3111155015': 11230,\n",
       " '2818648337': 102,\n",
       " '1771812765': 103,\n",
       " '2421414250': 9173,\n",
       " '4241501571': 105,\n",
       " '1569316024': 106,\n",
       " '3354141479': 108,\n",
       " '3957104249': 7866,\n",
       " '682667459': 109,\n",
       " '4261753049': 110,\n",
       " '2078647608': 111,\n",
       " '2741182317': 11879,\n",
       " '321356225': 112,\n",
       " '1512828747': 113,\n",
       " '284888222': 12422,\n",
       " '3161734222': 115,\n",
       " '915963444': 117,\n",
       " '2877011901': 118,\n",
       " '3427185275': 11276,\n",
       " '1073720066': 119,\n",
       " '2188481598': 120,\n",
       " '3197423454': 121,\n",
       " '3976610702': 122,\n",
       " '439311946': 123,\n",
       " '2723158684': 124,\n",
       " '2847152365': 126,\n",
       " '2024105424': 127,\n",
       " '3312755383': 129,\n",
       " '4073832558': 130,\n",
       " '2799591418': 7993,\n",
       " '1963825952': 281,\n",
       " '4227274972': 132,\n",
       " '228340462': 133,\n",
       " '2005419859': 11922,\n",
       " '2075203466': 136,\n",
       " '533036818': 137,\n",
       " '861989233': 831,\n",
       " '1722911749': 139,\n",
       " '4235863537': 846,\n",
       " '2186351621': 141,\n",
       " '3273736995': 142,\n",
       " '4083994402': 143,\n",
       " '272875209': 144,\n",
       " '1868735086': 145,\n",
       " '1203574711': 146,\n",
       " '3571352784': 147,\n",
       " '1158452780': 149,\n",
       " '1043841273': 150,\n",
       " '740148951': 151,\n",
       " '2456710844': 152,\n",
       " '234487772': 153,\n",
       " '3038741300': 154,\n",
       " '3099686443': 940,\n",
       " '83427780': 156,\n",
       " '1764893644': 157,\n",
       " '3812153136': 158,\n",
       " '1736406045': 159,\n",
       " '2280892735': 161,\n",
       " '376720958': 162,\n",
       " '1860786983': 164,\n",
       " '3413988347': 165,\n",
       " '8559115': 167,\n",
       " '1438024794': 168,\n",
       " '3507025494': 169,\n",
       " '1467809542': 170,\n",
       " '3759330615': 171,\n",
       " '129394252': 11417,\n",
       " '3819567837': 172,\n",
       " '2899012651': 4475,\n",
       " '317875697': 176,\n",
       " '3523716576': 177,\n",
       " '981244440': 178,\n",
       " '3075319534': 179,\n",
       " '38825896': 180,\n",
       " '2103025461': 181,\n",
       " '1232039292': 182,\n",
       " '4031268561': 183,\n",
       " '2464636842': 184,\n",
       " '2512346948': 185,\n",
       " '1203717384': 186,\n",
       " '3697638490': 87,\n",
       " '390684326': 188,\n",
       " '1827283730': 189,\n",
       " '3594729381': 190,\n",
       " '2042697619': 191,\n",
       " '4092123930': 11506,\n",
       " '3949816728': 193,\n",
       " '4043937476': 194,\n",
       " '1096700405': 12221,\n",
       " '2877314442': 196,\n",
       " '866923839': 197,\n",
       " '1061745506': 198,\n",
       " '655495803': 199,\n",
       " '486447403': 200,\n",
       " '1173365630': 201,\n",
       " '2626284779': 202,\n",
       " '2020531758': 1235,\n",
       " '2795993128': 13297,\n",
       " '3884256656': 204,\n",
       " '244220291': 205,\n",
       " '819605910': 10863,\n",
       " '3057888073': 206,\n",
       " '3729283847': 207,\n",
       " '2764233627': 6854,\n",
       " '789146264': 12895,\n",
       " '1430932461': 208,\n",
       " '3402209713': 209,\n",
       " '675888033': 210,\n",
       " '1915380946': 211,\n",
       " '3534091101': 212,\n",
       " '1824334392': 213,\n",
       " '2284893483': 11082,\n",
       " '3329566428': 215,\n",
       " '2762282302': 216,\n",
       " '2118802492': 217,\n",
       " '3549888444': 218,\n",
       " '1549888368': 219,\n",
       " '3926022702': 222,\n",
       " '1633704318': 223,\n",
       " '3889211600': 225,\n",
       " '978829373': 226,\n",
       " '1374330612': 227,\n",
       " '3996685669': 228,\n",
       " '3400642235': 229,\n",
       " '2696649330': 1468,\n",
       " '1590675258': 231,\n",
       " '984516743': 233,\n",
       " '1758417603': 234,\n",
       " '2235428011': 235,\n",
       " '1282404404': 236,\n",
       " '2185322441': 11538,\n",
       " '975577539': 239,\n",
       " '613668540': 240,\n",
       " '138523229': 241,\n",
       " '1759182938': 1571,\n",
       " '2560867051': 243,\n",
       " '4219250662': 244,\n",
       " '4273898950': 9966,\n",
       " '1873976153': 246,\n",
       " '2867772846': 247,\n",
       " '2363192851': 248,\n",
       " '169283528': 11261,\n",
       " '709512451': 251,\n",
       " '251657645': 252,\n",
       " '1298016856': 253,\n",
       " '3412307751': 254,\n",
       " '3352558065': 255,\n",
       " '513780850': 256,\n",
       " '2472934939': 257,\n",
       " '4215766191': 11869,\n",
       " '1462428678': 258,\n",
       " '2859386475': 259,\n",
       " '811652961': 260,\n",
       " '1429589754': 261,\n",
       " '1427394700': 262,\n",
       " '1417587050': 263,\n",
       " '750506418': 264,\n",
       " '3633741296': 4492,\n",
       " '1810281079': 266,\n",
       " '3167757146': 267,\n",
       " '393416610': 268,\n",
       " '3069899537': 269,\n",
       " '536949855': 271,\n",
       " '2106861684': 272,\n",
       " '3729155908': 273,\n",
       " '1111361846': 274,\n",
       " '3267347122': 275,\n",
       " '371746452': 276,\n",
       " '1888695830': 277,\n",
       " '2246974842': 3000,\n",
       " '1738181791': 279,\n",
       " '3633351483': 4280,\n",
       " '701208294': 13271,\n",
       " '647866667': 283,\n",
       " '3943014604': 11266,\n",
       " '505464566': 285,\n",
       " '2170473823': 11267,\n",
       " '1060440359': 2262,\n",
       " '1739388067': 288,\n",
       " '2232544430': 289,\n",
       " '3041724882': 290,\n",
       " '1985321109': 291,\n",
       " '1471308455': 292,\n",
       " '2272129750': 293,\n",
       " '4010042708': 294,\n",
       " '2134876228': 295,\n",
       " '1993199919': 296,\n",
       " '1783539311': 11096,\n",
       " '2832466526': 297,\n",
       " '3703433044': 298,\n",
       " '2961046023': 299,\n",
       " '2520824112': 11270,\n",
       " '3773409641': 302,\n",
       " '55735397': 304,\n",
       " '2422072300': 305,\n",
       " '2458143482': 306,\n",
       " '1913672917': 307,\n",
       " '2156430059': 308,\n",
       " '2167876034': 4499,\n",
       " '3667110496': 310,\n",
       " '2691509364': 311,\n",
       " '100417525': 312,\n",
       " '1139116596': 313,\n",
       " '377254812': 314,\n",
       " '3801433102': 315,\n",
       " '2157401626': 316,\n",
       " '1711604318': 317,\n",
       " '805963128': 318,\n",
       " '3619266504': 319,\n",
       " '1988291613': 320,\n",
       " '1381535648': 321,\n",
       " '2660205855': 322,\n",
       " '4294096869': 12984,\n",
       " '44862888': 9151,\n",
       " '268203907': 323,\n",
       " '2730018676': 4501,\n",
       " '263279656': 325,\n",
       " '3673014182': 326,\n",
       " '4244406355': 327,\n",
       " '25570068': 2101,\n",
       " '490590226': 329,\n",
       " '1828726073': 330,\n",
       " '2408267303': 331,\n",
       " '3319862596': 332,\n",
       " '171025395': 333,\n",
       " '677713566': 334,\n",
       " '61894853': 335,\n",
       " '1712448138': 336,\n",
       " '2407176743': 337,\n",
       " '3657419629': 338,\n",
       " '429395882': 8987,\n",
       " '2007442218': 339,\n",
       " '1063154747': 340,\n",
       " '1366554501': 341,\n",
       " '3574237758': 342,\n",
       " '1295729468': 343,\n",
       " '3851706723': 344,\n",
       " '1988027954': 345,\n",
       " '3036332464': 346,\n",
       " '3365983510': 347,\n",
       " '2333818729': 2227,\n",
       " '582672945': 349,\n",
       " '3696948957': 350,\n",
       " '1294755971': 351,\n",
       " '4025878315': 2246,\n",
       " '232415600': 353,\n",
       " '1592001933': 354,\n",
       " '1656987460': 355,\n",
       " '1781009055': 356,\n",
       " '2670583521': 357,\n",
       " '4116595591': 358,\n",
       " '1956857726': 7480,\n",
       " '4277799248': 2296,\n",
       " '844299203': 360,\n",
       " '135744766': 361,\n",
       " '615488222': 362,\n",
       " '4242816413': 363,\n",
       " '650456731': 364,\n",
       " '2851625619': 365,\n",
       " '322679569': 366,\n",
       " '2653321720': 367,\n",
       " '4092689670': 368,\n",
       " '274265388': 369,\n",
       " '1309574084': 370,\n",
       " '1218003893': 371,\n",
       " '3935738455': 372,\n",
       " '2796594102': 373,\n",
       " '495818697': 374,\n",
       " '407061424': 375,\n",
       " '1578063348': 9249,\n",
       " '3892571955': 376,\n",
       " '1930033127': 377,\n",
       " '794450376': 379,\n",
       " '2101810388': 12628,\n",
       " '3144013743': 380,\n",
       " '2448320909': 381,\n",
       " '1695480317': 382,\n",
       " '3407311067': 383,\n",
       " '122289878': 384,\n",
       " '2526925550': 270,\n",
       " '2750873665': 386,\n",
       " '3051804335': 387,\n",
       " '3843631351': 388,\n",
       " '3028859957': 6731,\n",
       " '3434513569': 390,\n",
       " '1630243360': 391,\n",
       " '1184535341': 392,\n",
       " '1006903887': 393,\n",
       " '1093614806': 394,\n",
       " '2129718710': 395,\n",
       " '1341096506': 396,\n",
       " '1657604679': 398,\n",
       " '1860511950': 399,\n",
       " '1365361942': 402,\n",
       " '146887909': 403,\n",
       " '3221466050': 404,\n",
       " '2794180407': 405,\n",
       " '450183377': 406,\n",
       " '553840202': 407,\n",
       " '321597084': 408,\n",
       " '679700922': 409,\n",
       " '1889940073': 410,\n",
       " '1840365070': 7837,\n",
       " '3890023938': 412,\n",
       " '3617812300': 413,\n",
       " '1845786756': 11283,\n",
       " '1190152428': 415,\n",
       " '2706390147': 416,\n",
       " '1935315082': 417,\n",
       " '2644800408': 418,\n",
       " '2906696294': 419,\n",
       " '525889178': 420,\n",
       " '1033421355': 421,\n",
       " '506476254': 12332,\n",
       " '4172382349': 422,\n",
       " '3021023405': 423,\n",
       " '4072078077': 424,\n",
       " '4205511631': 425,\n",
       " '1659188708': 4519,\n",
       " '2455869070': 10551,\n",
       " '4070309332': 428,\n",
       " '3610112479': 429,\n",
       " '619666754': 431,\n",
       " '3938234624': 432,\n",
       " '2077865887': 433,\n",
       " '737340986': 435,\n",
       " '4045444993': 11816,\n",
       " '1686028847': 436,\n",
       " '2190223358': 437,\n",
       " '3855173861': 438,\n",
       " '92434189': 2897,\n",
       " '3318022618': 439,\n",
       " '383607907': 440,\n",
       " '4280897548': 1850,\n",
       " '3988717856': 442,\n",
       " '582287969': 443,\n",
       " '3119717504': 444,\n",
       " '704244113': 445,\n",
       " '2996288314': 446,\n",
       " '2414358105': 11291,\n",
       " '536059045': 448,\n",
       " '2588804869': 450,\n",
       " '2409474647': 451,\n",
       " '553125617': 452,\n",
       " '1764172149': 453,\n",
       " '2115776454': 454,\n",
       " '1507489572': 455,\n",
       " '4027073137': 456,\n",
       " '4192978272': 457,\n",
       " '4037816474': 458,\n",
       " '181219102': 6744,\n",
       " '2706955572': 460,\n",
       " '51076977': 461,\n",
       " '512403156': 462,\n",
       " '3865518711': 463,\n",
       " '2368738722': 464,\n",
       " '3441655380': 465,\n",
       " '1819998447': 466,\n",
       " '3531332626': 467,\n",
       " '3980181155': 468,\n",
       " '643979828': 469,\n",
       " '1640004842': 470,\n",
       " '3289739345': 471,\n",
       " '1948173913': 472,\n",
       " '3434354867': 473,\n",
       " '2384254802': 474,\n",
       " '3402377961': 475,\n",
       " '1449747272': 9007,\n",
       " '3183605169': 477,\n",
       " '4156705844': 478,\n",
       " '2928192183': 479,\n",
       " '1077825961': 480,\n",
       " '1187117667': 481,\n",
       " '1527128646': 482,\n",
       " '3501509607': 483,\n",
       " '99226238': 484,\n",
       " '1403176651': 485,\n",
       " '2098456340': 9010,\n",
       " '3021122125': 3073,\n",
       " '1146130134': 488,\n",
       " '2263694418': 489,\n",
       " '3187969599': 490,\n",
       " '410842839': 491,\n",
       " '2828360569': 492,\n",
       " '1807883016': 493,\n",
       " '1179474743': 494,\n",
       " '1099380892': 495,\n",
       " '2971102319': 496,\n",
       " '2944816123': 497,\n",
       " '3268726556': 498,\n",
       " '457253413': 499,\n",
       " '3534827249': 500,\n",
       " '2834364568': 501,\n",
       " '3788504448': 502,\n",
       " '1565011423': 503,\n",
       " '2026724113': 11296,\n",
       " '4202744793': 505,\n",
       " '3500532250': 506,\n",
       " '2568221006': 507,\n",
       " '1800619761': 508,\n",
       " '2180353936': 509,\n",
       " '1315709702': 510,\n",
       " '2559164171': 511,\n",
       " '3219983221': 512,\n",
       " '3286516126': 513,\n",
       " '866903362': 514,\n",
       " '2370228216': 515,\n",
       " '353850493': 516,\n",
       " '3823594890': 517,\n",
       " '2798408743': 518,\n",
       " '1727806427': 519,\n",
       " '326944617': 520,\n",
       " '1237174145': 521,\n",
       " '4000643494': 522,\n",
       " '1245942866': 523,\n",
       " '2671865684': 524,\n",
       " '1955298811': 525,\n",
       " '3509100603': 11328,\n",
       " '1904753074': 526,\n",
       " '2274599788': 527,\n",
       " '974024618': 528,\n",
       " '914240358': 530,\n",
       " '2178315774': 531,\n",
       " '1819771813': 532,\n",
       " '785309813': 533,\n",
       " '3492796375': 534,\n",
       " '519767743': 535,\n",
       " '3075707957': 536,\n",
       " '769827874': 537,\n",
       " '2036167774': 538,\n",
       " '4254934247': 539,\n",
       " '58444357': 540,\n",
       " '1005830738': 541,\n",
       " '2608593001': 542,\n",
       " '1387775604': 543,\n",
       " '1977475739': 544,\n",
       " '2444024386': 545,\n",
       " '3466325304': 547,\n",
       " '3275948662': 548,\n",
       " '2301324542': 549,\n",
       " '3946619910': 550,\n",
       " '787562214': 551,\n",
       " '1329952040': 552,\n",
       " '1351516910': 2120,\n",
       " '67648066': 554,\n",
       " '3211869804': 555,\n",
       " '1296215153': 556,\n",
       " '2704376528': 557,\n",
       " '1726301772': 3495,\n",
       " '3701320707': 559,\n",
       " '3129396339': 560,\n",
       " '1711311187': 3440,\n",
       " '3931717290': 562,\n",
       " '896485186': 563,\n",
       " '1824198418': 564,\n",
       " '680133573': 6756,\n",
       " '526347239': 567,\n",
       " '1407801833': 568,\n",
       " '2084337326': 569,\n",
       " '3391397881': 570,\n",
       " '1551306530': 571,\n",
       " '4276521055': 572,\n",
       " '1439724705': 573,\n",
       " '1700908753': 574,\n",
       " '1783630773': 575,\n",
       " '3104895742': 12907,\n",
       " '1605832718': 576,\n",
       " '2071994542': 577,\n",
       " '468922992': 578,\n",
       " '3105234108': 580,\n",
       " '2883057132': 581,\n",
       " '2132888830': 4542,\n",
       " '1209548026': 583,\n",
       " '1955143671': 584,\n",
       " '1872585098': 585,\n",
       " '1793588465': 586,\n",
       " '3636792221': 587,\n",
       " '2057236313': 6758,\n",
       " '4086500156': 589,\n",
       " '4112668226': 590,\n",
       " '374048503': 591,\n",
       " '830183321': 592,\n",
       " '2808705599': 594,\n",
       " '3722242006': 595,\n",
       " '4141101486': 22,\n",
       " '1261557576': 598,\n",
       " '2367733143': 4543,\n",
       " '328882352': 599,\n",
       " '1281335602': 601,\n",
       " '72291902': 11313,\n",
       " '1064284965': 602,\n",
       " '3022634670': 603,\n",
       " '1953860425': 604,\n",
       " '927693315': 605,\n",
       " '2533071551': 606,\n",
       " '2505738116': 607,\n",
       " '3602218872': 608,\n",
       " '693206007': 609,\n",
       " '663617415': 610,\n",
       " '2539026679': 611,\n",
       " '3631560512': 3810,\n",
       " '3926217735': 613,\n",
       " '2505330132': 614,\n",
       " '1169494077': 11315,\n",
       " '3622325352': 616,\n",
       " '3200581311': 617,\n",
       " '2053603846': 618,\n",
       " '2704232594': 619,\n",
       " '1582270949': 620,\n",
       " '4125204455': 621,\n",
       " '3981836848': 622,\n",
       " '3855347794': 623,\n",
       " '939509575': 624,\n",
       " '3754239356': 625,\n",
       " '1545607295': 626,\n",
       " '1163805498': 627,\n",
       " '3017293541': 629,\n",
       " '2011450028': 630,\n",
       " '416330287': 4549,\n",
       " '818027743': 10522,\n",
       " '3935076467': 632,\n",
       " '432884667': 633,\n",
       " '3341857073': 634,\n",
       " '3113660150': 635,\n",
       " '3705876954': 12258,\n",
       " '4141771222': 9038,\n",
       " '852324847': 637,\n",
       " '2284384411': 3329,\n",
       " '1351006732': 639,\n",
       " '1381165425': 640,\n",
       " '1274017391': 641,\n",
       " '3425996117': 642,\n",
       " '409261694': 12597,\n",
       " '1986416649': 643,\n",
       " '1153175316': 644,\n",
       " '787628032': 645,\n",
       " '2028042509': 646,\n",
       " '3404987729': 647,\n",
       " '3067996970': 648,\n",
       " '263011441': 649,\n",
       " '766696037': 4076,\n",
       " '1883524093': 651,\n",
       " '3034841234': 652,\n",
       " '474449136': 653,\n",
       " '289899674': 654,\n",
       " '3225161223': 655,\n",
       " '788806703': 656,\n",
       " '947107369': 657,\n",
       " '1582951351': 658,\n",
       " '4265093803': 659,\n",
       " '1817434960': 660,\n",
       " '832111597': 661,\n",
       " '2219392559': 662,\n",
       " '2384973677': 663,\n",
       " '2118368309': 664,\n",
       " '3809205963': 665,\n",
       " '3430641796': 666,\n",
       " '3126297081': 667,\n",
       " '1844176380': 668,\n",
       " '4110062919': 669,\n",
       " '2993747793': 670,\n",
       " '177193417': 12575,\n",
       " '878068450': 671,\n",
       " '2424637456': 672,\n",
       " '956438727': 673,\n",
       " '590488063': 674,\n",
       " '1517979198': 12246,\n",
       " '3621854275': 675,\n",
       " '268174971': 676,\n",
       " '4220792229': 677,\n",
       " '293751558': 678,\n",
       " '1801766962': 2324,\n",
       " '3319506135': 680,\n",
       " '322709533': 681,\n",
       " '2368911433': 682,\n",
       " '964120039': 683,\n",
       " '4099353109': 684,\n",
       " '1742102536': 685,\n",
       " '1359556127': 686,\n",
       " '1271755958': 687,\n",
       " '702157839': 9046,\n",
       " '3580442023': 689,\n",
       " '4232519602': 690,\n",
       " '1032588622': 691,\n",
       " '1342847040': 692,\n",
       " '2610561518': 693,\n",
       " '4002881841': 694,\n",
       " '1023712594': 695,\n",
       " '2509552492': 696,\n",
       " '3629737683': 697,\n",
       " '561449801': 698,\n",
       " '2179267582': 700,\n",
       " '1359749784': 701,\n",
       " '2766617713': 702,\n",
       " '2859654056': 703,\n",
       " '845902488': 704,\n",
       " '760916752': 705,\n",
       " '705007381': 706,\n",
       " '4080537914': 4396,\n",
       " '1764411835': 708,\n",
       " '1945597998': 709,\n",
       " '4104785185': 710,\n",
       " '1987345098': 711,\n",
       " '1143203761': 712,\n",
       " '3906291725': 713,\n",
       " '2748187097': 13074,\n",
       " '3171268449': 715,\n",
       " '176859368': 716,\n",
       " '2839635887': 717,\n",
       " '2391976137': 719,\n",
       " '2424074793': 720,\n",
       " '1023138400': 721,\n",
       " '426166781': 722,\n",
       " '249760834': 723,\n",
       " '3222871469': 724,\n",
       " '3834889020': 725,\n",
       " '3482282242': 726,\n",
       " '1255629030': 727,\n",
       " '3501001868': 728,\n",
       " '3833635397': 9068,\n",
       " '2802110996': 729,\n",
       " '1807182727': 730,\n",
       " '1340479010': 731,\n",
       " '172405986': 732,\n",
       " '3548928882': 733,\n",
       " '1062024228': 734,\n",
       " '3950787980': 735,\n",
       " '3635358150': 736,\n",
       " '1389265162': 737,\n",
       " '547383999': 738,\n",
       " '2895849260': 4558,\n",
       " '3656008998': 740,\n",
       " '2951987246': 9057,\n",
       " '3658165839': 742,\n",
       " '4188214514': 743,\n",
       " '3086836700': 5413,\n",
       " '3818612808': 5759,\n",
       " '3008408180': 746,\n",
       " '1572089933': 747,\n",
       " '393162138': 748,\n",
       " '4051353547': 749,\n",
       " '1658768128': 750,\n",
       " '1506378274': 752,\n",
       " '743289246': 753,\n",
       " '1596190412': 754,\n",
       " '471488113': 755,\n",
       " '1327533901': 756,\n",
       " '3503174392': 4569,\n",
       " '2806794871': 758,\n",
       " '2893435883': 759,\n",
       " '35091389': 761,\n",
       " '2184211588': 762,\n",
       " '610221734': 764,\n",
       " '3284750825': 765,\n",
       " '3326401128': 766,\n",
       " '885971279': 767,\n",
       " '2692070381': 768,\n",
       " '3590807274': 4705,\n",
       " '3479738412': 770,\n",
       " '899213418': 771,\n",
       " '663120398': 772,\n",
       " '1764159783': 773,\n",
       " '1620415785': 774,\n",
       " '3579174462': 775,\n",
       " '1079899197': 776,\n",
       " '2994155492': 7346,\n",
       " '3700378878': 779,\n",
       " '2567308538': 780,\n",
       " '1012343640': 781,\n",
       " '705138890': 782,\n",
       " '1142967652': 783,\n",
       " '2943640347': 784,\n",
       " '363024307': 1163,\n",
       " '1935724864': 11442,\n",
       " '2668685763': 7885,\n",
       " '3803531290': 786,\n",
       " '205658920': 787,\n",
       " '3436633625': 788,\n",
       " '3476188186': 9410,\n",
       " '679023125': 789,\n",
       " '2554361988': 790,\n",
       " '3250536690': 791,\n",
       " '2062625787': 792,\n",
       " '937839032': 4873,\n",
       " '816912378': 795,\n",
       " '2460957668': 796,\n",
       " '3632072502': 797,\n",
       " '2153348068': 798,\n",
       " '3304261393': 799,\n",
       " '3192664309': 12269,\n",
       " '3847927617': 800,\n",
       " '3671450770': 803,\n",
       " '3123256869': 804,\n",
       " '940376988': 805,\n",
       " '1430458652': 806,\n",
       " '667664881': 807,\n",
       " '2925109008': 808,\n",
       " '688975255': 809,\n",
       " '2963144429': 810,\n",
       " '1036961928': 811,\n",
       " '1395200630': 812,\n",
       " '951253136': 813,\n",
       " '719050114': 8888,\n",
       " '3926229879': 815,\n",
       " '1122283794': 816,\n",
       " '3628957138': 817,\n",
       " '1850184598': 818,\n",
       " '3161434996': 819,\n",
       " '4202927804': 5094,\n",
       " '3547653054': 821,\n",
       " '1274527630': 822,\n",
       " '61104529': 823,\n",
       " '2840642149': 824,\n",
       " '2936735895': 11346,\n",
       " '3155836085': 825,\n",
       " '2063976566': 826,\n",
       " '4095724028': 9074,\n",
       " '3330051251': 828,\n",
       " '872878580': 829,\n",
       " '312581825': 830,\n",
       " '137465175': 138,\n",
       " '1120182267': 833,\n",
       " '3863651111': 834,\n",
       " '1486919469': 835,\n",
       " '4228109405': 836,\n",
       " '1960789592': 837,\n",
       " '1547325648': 2905,\n",
       " '2688159351': 841,\n",
       " '3369636425': 842,\n",
       " '87840033': 844,\n",
       " '2073339412': 11351,\n",
       " '2166119628': 140,\n",
       " '2454980175': 847,\n",
       " '300126317': 848,\n",
       " '578037803': 849,\n",
       " '1401314683': 5281,\n",
       " '646715160': 851,\n",
       " '4266815563': 852,\n",
       " '721082024': 854,\n",
       " '1294433853': 855,\n",
       " '2254075965': 856,\n",
       " '256505275': 857,\n",
       " '486982721': 11355,\n",
       " '904937903': 11356,\n",
       " '468069570': 859,\n",
       " '3191533412': 860,\n",
       " '3321517547': 861,\n",
       " '626503343': 863,\n",
       " '1400053177': 864,\n",
       " '1170338316': 865,\n",
       " '1385126312': 866,\n",
       " '3429958607': 868,\n",
       " '2137154383': 869,\n",
       " '69856756': 5415,\n",
       " '1454534917': 871,\n",
       " '744858144': 872,\n",
       " '1757362667': 873,\n",
       " '2976928477': 874,\n",
       " '3453997003': 875,\n",
       " '2007279414': 876,\n",
       " '2304176660': 6811,\n",
       " '1020530176': 878,\n",
       " '3174177804': 879,\n",
       " '818142116': 5482,\n",
       " '181332418': 881,\n",
       " '536196786': 882,\n",
       " '493625429': 883,\n",
       " '3054051090': 884,\n",
       " '2269626530': 885,\n",
       " '2159444507': 886,\n",
       " '974085606': 887,\n",
       " '1396446223': 889,\n",
       " '1310931838': 890,\n",
       " '1669367540': 891,\n",
       " '2758697837': 892,\n",
       " '726662409': 893,\n",
       " '3432819058': 894,\n",
       " '2279381833': 895,\n",
       " '4004100709': 896,\n",
       " '3684919275': 897,\n",
       " '1769487166': 898,\n",
       " '815715465': 899,\n",
       " '2088284484': 900,\n",
       " '1554642353': 901,\n",
       " '3681999765': 902,\n",
       " '1812117472': 903,\n",
       " '64442250': 904,\n",
       " '692075109': 905,\n",
       " '3080242563': 906,\n",
       " '4189303160': 908,\n",
       " '1214605379': 909,\n",
       " '44607617': 910,\n",
       " '1696929787': 911,\n",
       " '1506510838': 912,\n",
       " '317758728': 914,\n",
       " '2505215665': 915,\n",
       " '54535450': 916,\n",
       " '1880608957': 917,\n",
       " '4181300264': 918,\n",
       " '1468984003': 9090,\n",
       " '2490438703': 920,\n",
       " '2021591040': 921,\n",
       " '4215173213': 922,\n",
       " '4100237668': 923,\n",
       " '3887323073': 924,\n",
       " '1951237429': 925,\n",
       " '3129233779': 926,\n",
       " '3080864770': 927,\n",
       " '1282392038': 928,\n",
       " '2491530958': 929,\n",
       " '3765974734': 930,\n",
       " '2518430453': 931,\n",
       " '3406035843': 932,\n",
       " '604019700': 933,\n",
       " '1266459653': 9999,\n",
       " '2213514822': 934,\n",
       " '2714036762': 936,\n",
       " '3045891165': 937,\n",
       " '1287884858': 939,\n",
       " '1177314523': 155,\n",
       " '565086586': 941,\n",
       " '1386545389': 942,\n",
       " '887203538': 944,\n",
       " '3864658857': 945,\n",
       " '3794166180': 947,\n",
       " '4267881656': 948,\n",
       " '335083853': 949,\n",
       " '2038842201': 950,\n",
       " '1720077487': 951,\n",
       " '2577118609': 4605,\n",
       " '3097220122': 953,\n",
       " '1701917923': 5908,\n",
       " '3480369605': 3883,\n",
       " '1633263987': 956,\n",
       " '228724478': 9479,\n",
       " '2253311961': 957,\n",
       " '1704192379': 958,\n",
       " '3235943764': 959,\n",
       " '681905393': 4607,\n",
       " '1256540913': 961,\n",
       " '361295277': 962,\n",
       " '682567291': 963,\n",
       " '3949409640': 964,\n",
       " '1702401336': 965,\n",
       " '3237640011': 966,\n",
       " '1602227394': 967,\n",
       " '383563223': 968,\n",
       " '252821798': 969,\n",
       " '1355891623': 970,\n",
       " '2393885244': 971,\n",
       " '943870191': 972,\n",
       " '846069276': 973,\n",
       " '3622885988': 4975,\n",
       " '2363524424': 974,\n",
       " '1044854627': 975,\n",
       " '1565715575': 976,\n",
       " '110357109': 977,\n",
       " '1380051674': 978,\n",
       " '2780690728': 979,\n",
       " '396698671': 980,\n",
       " '1396454425': 981,\n",
       " '733567972': 982,\n",
       " '1272961219': 983,\n",
       " '316055690': 984,\n",
       " '1118969585': 986,\n",
       " '1855529308': 987,\n",
       " '613851355': 988,\n",
       " '1516820536': 989,\n",
       " '480523741': 991,\n",
       " '939563341': 992,\n",
       " '2145223158': 993,\n",
       " '3655295959': 994,\n",
       " '2452291959': 995,\n",
       " '791187416': 996,\n",
       " '3039309993': 997,\n",
       " '2157199483': 998,\n",
       " '1000018501': 999,\n",
       " '1092379167': 1000,\n",
       " '1967012522': 1001,\n",
       " '2225155651': 1002,\n",
       " '1217440720': 1003,\n",
       " '2936461848': 1004,\n",
       " '1556853759': 1005,\n",
       " '3276095034': 1006,\n",
       " '1829766016': 1007,\n",
       " '2520855981': 1008,\n",
       " '2992424940': 1009,\n",
       " '1082833698': 1010,\n",
       " '1711030046': 1011,\n",
       " '3928351581': 1012,\n",
       " '2829792984': 1013,\n",
       " '843676515': 1014,\n",
       " '3679301219': 1015,\n",
       " '652541886': 4028,\n",
       " '1512723504': 1017,\n",
       " '3173314627': 1018,\n",
       " '1343845939': 1019,\n",
       " '3442103541': 1020,\n",
       " '2637341381': 1021,\n",
       " '3175320123': 1022,\n",
       " ...}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 统计训练集中有多少不同的用户的events\n",
    "uniqueUsers = set()\n",
    "uniqueEvents = set()\n",
    "\n",
    "for filename in [\"train.csv\", \"test.csv\"]:\n",
    "    f = open(filename, 'rb')\n",
    "    f.readline()\n",
    "    for line in f:    #对每条记录\n",
    "        cols = line.strip().split(\",\")\n",
    "        uniqueUsers.add(cols[0])   #第一列为用户ID\n",
    "        uniqueEvents.add(cols[1])   #第二列为活动ID\n",
    "    f.close()\n",
    "\n",
    "eventIndex = dict()\n",
    "for i, e in enumerate(uniqueEvents):\n",
    "    eventIndex[e] = i\n",
    "\n",
    "eventIndex"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2.聚类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "EventCount1 = ss.dok_matrix((5, 5))\n",
    "EventCount1[1 , 1] = 5\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_uniqueEvents = len(uniqueEvents)\n",
    "EventCount = ss.dok_matrix((n_uniqueEvents, 101))\n",
    "\n",
    "f = open(\"events.csv\", 'rb')\n",
    "f.readline()\n",
    "for line in f:\n",
    "    cols = line.strip().split(\",\")\n",
    "    if cols[0] in uniqueEvents:\n",
    "        i = eventIndex[cols[0]]\n",
    "        for j in range(9, 110):\n",
    "            EventCount[i, j - 9] = int(cols[j])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "def K_cluster_analysis(K, X_train):\n",
    "    start = time.time()\n",
    "    \n",
    "    print(\"K-means begin with clusters: {}\".format(K));\n",
    "    \n",
    "    mb_kmeans = MiniBatchKMeans(n_clusters = K)\n",
    "    mb_kmeans.fit(X_train)\n",
    "    \n",
    "    CH_score = metrics.silhouette_score(X_train,mb_kmeans.predict(X_train))\n",
    "    #v_score = metrics.v_measure_score(y_val, y_val_pred)\n",
    "    \n",
    "    end = time.time()\n",
    "    print(\"CH_score: {}, time elaps:{}\".format(CH_score, int(end-start)))\n",
    "    \n",
    "    return CH_score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3.CH_scores计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters: 10\n",
      "CH_score: 0.386654577622, time elaps:14\n",
      "K-means begin with clusters: 20\n",
      "CH_score: 0.221713533015, time elaps:12\n",
      "K-means begin with clusters: 30\n",
      "CH_score: 0.136441647421, time elaps:10\n",
      "K-means begin with clusters: 40\n",
      "CH_score: 0.192503159161, time elaps:10\n",
      "K-means begin with clusters: 50\n",
      "CH_score: 0.168954320619, time elaps:10\n",
      "K-means begin with clusters: 60\n",
      "CH_score: 0.131741341415, time elaps:9\n",
      "K-means begin with clusters: 70\n",
      "CH_score: 0.101454697124, time elaps:10\n",
      "K-means begin with clusters: 80\n",
      "CH_score: 0.0946404379525, time elaps:9\n",
      "K-means begin with clusters: 90\n",
      "CH_score: 0.0689150066852, time elaps:10\n",
      "K-means begin with clusters: 100\n",
      "CH_score: 0.0766703278292, time elaps:10\n"
     ]
    }
   ],
   "source": [
    "Ks = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]\n",
    "CH_scores = []\n",
    "for K in Ks:\n",
    "    ch = K_cluster_analysis(K, EventCount)\n",
    "    CH_scores.append(ch)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4. 结果显示/分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0xdd52a90>]"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xt4VNW9//H3lyCKSrmYqMg1KtJGULQjtUUUr0C1gJi0eDmitQdvVOvlVKxaHvFUKlqprVSl1nuRiqKibUVFrPV4I8hFAZGACJGKUaxUBRT4/v5Ykx9DCGRCJtkzsz+v55kn2Xv2nvlm3H5ms/baa5m7IyIi8dAs6gJERKTpKPRFRGJEoS8iEiMKfRGRGFHoi4jEiEJfRCRGFPoiIjGSVuib2QAzW2xmFWY2agfblZqZm1kiZd3Vyf0Wm1n/TBQtIiI7p3ldG5hZATABOBGoBGaZ2TR3X1hju1bAJcDrKetKgGHAwcB+wPNmdpC7b8rcnyAiIumqM/SB3kCFuy8DMLPJwGBgYY3tbgDGAVemrBsMTHb3DcB7ZlaRfL1Xt/dmhYWF3rVr17T/ABERgdmzZ3/s7kV1bZdO6HcAVqYsVwLfSd3AzA4DOrn702Z2ZY19X6uxb4cdvVnXrl0pLy9PoywREalmZu+ns106bfpWy7r/P2CPmTUDxgNX1HfflNcYYWblZlZeVVWVRkkiIrIz0gn9SqBTynJHYFXKciugB/CimS0HjgSmJS/m1rUvAO4+0d0T7p4oKqrzXyciIrKT0gn9WUA3Mys2sxaEC7PTqp9098/cvdDdu7p7V0JzziB3L09uN8zMdjWzYqAb8EbG/woREUlLnW367r7RzEYC04EC4B53X2BmY4Byd5+2g30XmNkjhIu+G4GL1XNHRCQ6lm3j6ScSCdeFXBGR+jGz2e6eqGs73ZErIhIjCn0RkRjJm9BfswbGjIH586OuREQke+VN6DdrBv/7v/DQQ1FXIiKSvfIm9Nu0gRNPhClTIMuuTYuIZI28CX2A0lJYvhzefDPqSkREslNehf7gwdC8OTz6aNSViIhkp7wK/Xbt4Pjj1cQjIrI9eRX6EJp4li6FefOirkREJPvkXegPGQIFBWriERGpTd6FfmEhHHusmnhERGqTd6EPoYnn3Xfh7bejrkREJLvkZeifemq4WWvKlKgrERHJLnkZ+nvvDccco3Z9EZGa8jL0ITTxLFoEC2tO3y4iEmN5G/pDh4KZmnhERFLlbejvuy/07asmHhGRVHkb+hCaeN5+G955J+pKRESyQ16H/mmnhZ862xcRCfI69PfbD/r0UeiLiFTL69CH0MQzbx4sWRJ1JSIi0cv70FcTj4jIFmmFvpkNMLPFZlZhZqNqef4CM3vLzOaa2ctmVpJc39XM1iXXzzWzOzP9B9SlUyc48kiFvogIpBH6ZlYATAAGAiXA6dWhnmKSu/d0917AOODWlOeWunuv5OOCTBVeH2VlYTatZcuieHcRkeyRzpl+b6DC3Ze5+1fAZGBw6gbuvjZlcQ8gq8a3VBOPiEiQTuh3AFamLFcm123FzC42s6WEM/1LUp4qNrM5ZvYPM+vboGp3UpcucMQRCn0RkXRC32pZt82ZvLtPcPcDgKuAa5Or/wV0dvfDgMuBSWb2jW3ewGyEmZWbWXlVVVX61ddDWRnMmhUmThcRiat0Qr8S6JSy3BFYtYPtJwNDANx9g7t/kvx9NrAUOKjmDu4+0d0T7p4oKipKt/Z6qW7ieeyxRnl5EZGckE7ozwK6mVmxmbUAhgHTUjcws24piycDS5Lri5IXgjGz/YFuQCSXU/ffHw4/XE08IhJvdYa+u28ERgLTgUXAI+6+wMzGmNmg5GYjzWyBmc0lNOMMT64/GphvZvOAR4EL3H1Nxv+KNJWVwWuvwcqVdW8rIpKPzLNsItlEIuHl5eWN8tpLlsBBB8H48fCznzXKW4iIRMLMZrt7oq7t8v6O3FTdusGhh2qMfRGJr1iFPoQmnldegQ8+iLoSEZGmF7vQLy0NP6dOjbYOEZEoxC70u3eHnj3VxCMi8RS70Idwtv/yy/Cvf0VdiYhI04pt6LvD449HXYmISNOKZeiXlISHmnhEJG5iGfoQzvZfeglWr466EhGRphPr0N+8WU08IhIvsQ39Hj1CTx6NxSMicRLb0DcLZ/svvgiNNJqziEjWiW3oQwj9TZvgiSeirkREpGnEOvQPPRQOPFBNPCISH7EO/eomnhkz4JNPoq5GRKTxxTr0YUsTz5NPRl2JiEjji33oH344FBeriUdE4iH2oV/dxPP88/Dpp1FXIyLSuGIf+hDG2P/6a5g2re5tRURymUIfSCSgc2c18YhI/lPos6WJ59ln4bPPoq5GRKTxKPSTysrgq6/gqaeirkREpPEo9JN694aOHdXEIyL5La3QN7MBZrbYzCrMbFQtz19gZm+Z2Vwze9nMSlKeuzq532Iz65/J4jOpWTM47TR45hlYuzbqakREGkedoW9mBcAEYCBQApyeGupJk9y9p7v3AsYBtyb3LQGGAQcDA4A/JF8vK5WVwYYN8Ne/Rl2JiEjjSOdMvzdQ4e7L3P0rYDIwOHUDd089N94D8OTvg4HJ7r7B3d8DKpKvl5W++11o315NPCKSv9IJ/Q7AypTlyuS6rZjZxWa2lHCmf0l99s0W1U08f/sbfP551NWIiGReOqFvtazzbVa4T3D3A4CrgGvrs6+ZjTCzcjMrr4p4cPuyMli/PgS/iEi+SSf0K4FOKcsdgVU72H4yMKQ++7r7RHdPuHuiqKgojZIaT58+sM8+mjRdRPJTOqE/C+hmZsVm1oJwYXarAQvMrFvK4snAkuTv04BhZrarmRUD3YA3Gl524yko2NLE88UXUVcjIpJZdYa+u28ERgLTgUXAI+6+wMzGmNmg5GYjzWyBmc0FLgeGJ/ddADwCLASeAS52902N8HdkVGkpfPll6L4pIpJPzH2bJvZIJRIJLy8vj7SGjRthv/3guONg8uRISxERSYuZzXb3RF3b6Y7cWjRvDkOHwtNPw7p1UVcjIpI5Cv3tKC0NbfrTp0ddiYhI5ij0t6NfP9hrL/XiEZH8otDfjubN4dRTw6ib69dHXY2ISGYo9HegtBT+858wzr6ISD5Q6O/AccdB27Yai0dE8odCfwd22QWGDAlz527YEHU1IiINp9CvQ2lpmELx+eejrkREpOEU+nU44QRo3VpNPCKSHxT6dWjRAgYPhieeCHPoiojkMoV+GkpL4d//hhdeiLoSEZGGUein4aSToFUrNfGISO5T6Kdh111h0CB4/HH4+uuoqxER2XkK/TSVlsKaNfDii1FXIiKy8xT6aerfH/bcU008IpLbFPppatkSTjkFpk4N4+2LiOQihX49lJbCxx/DSy9FXYmIyM5R6NfDwIGw++5q4hGR3KXQr4fdd4eTTw5NPJuyfqZfEZFtKfTrqawMVq+Gl1+OuhIRkfpT6NfTwIHhoq5m1BKRXKTQr6c99wzB/9hjsHlz1NWIiNSPQn8nlJXBhx/CK69EXYmISP2kFfpmNsDMFptZhZmNquX5y81soZnNN7MZZtYl5blNZjY3+ZiWyeKjcvLJYWgGNfGISK6pM/TNrACYAAwESoDTzaykxmZzgIS7HwI8CoxLeW6du/dKPgZlqO5ItWoFAwaoiUdEck86Z/q9gQp3X+buXwGTgcGpG7j7THf/Mrn4GtAxs2Vmn7Iy+OADeP31qCsREUlfOqHfAViZslyZXLc95wF/T1nezczKzew1MxtS2w5mNiK5TXlVVVUaJUXvlFPCBCtq4hGRXJJO6Fst67zWDc3OAhLAzSmrO7t7AjgD+K2ZHbDNi7lPdPeEuyeKiorSKCl6rVuHcfYffRS81k9DRCT7pBP6lUCnlOWOwKqaG5nZCcA1wCB331C93t1XJX8uA14EDmtAvVmlrAxWroRZs6KuREQkPemE/iygm5kVm1kLYBiwVS8cMzsMuIsQ+B+lrG9rZrsmfy8E+gALM1V81AYNgl12UROPiOSOOkPf3TcCI4HpwCLgEXdfYGZjzKy6N87NwJ7AlBpdM78FlJvZPGAm8Gt3z5vQb9MGTjxRTTwikjvMsyytEomEl5eXR11G2u69F37849DEk0hEXY2IxJWZzU5eP90h3ZHbQIMHQ/PmGm5ZRHKDQr+B2rWD449XE4+I5AaFfgaUlsLSpTB3btSViIjsmEI/A4YMgYICNfGISPZT6GdAYSEce2zouqkmHhHJZgr9DCkthSVL4K23oq5ERGT7FPoZcuqp0KyZmnhEJLsp9DNk773hmGPUxCMi2U2hn0GlpfDOO7Awb+45FpF8o9DPoKFDwUxNPCKSvRT6GbTvvtC3rwZgE5HspdDPsNJSWLAA5syJuhIRkW0p9DPszDNDv/2LLoJNm6KuRkRkawr9DGvXDsaPh9degzvuiLoaEZGtKfQbwZlnQv/+cPXVYWYtEZFsodBvBGbhLH/z5tDMo377IpItFPqNpLgYbrgBnn5avXlEJHso9BvRJZfAt78dfn76adTViIgo9BtV8+Zw993w8cfwP/8TdTUiIgr9RterF1xxBfzpTzBzZtTViEjcKfSbwOjRcMABMGIErFsXdTUiEmcK/Saw++5w111QUREu7oqIRCWt0DezAWa22MwqzGxULc9fbmYLzWy+mc0wsy4pzw03syXJx/BMFp9Ljj8ezjkHbr4Z5s+PuhoRias6Q9/MCoAJwECgBDjdzEpqbDYHSLj7IcCjwLjkvu2A0cB3gN7AaDNrm7nyc8stt0DbtvCTn2iIBhGJRjpn+r2BCndf5u5fAZOBwakbuPtMd/8yufga0DH5e3/gOXdf4+6fAs8BAzJTeu7Zay+47TaYNQtuvz3qakQkjtIJ/Q5A6mAClcl123Me8Pf67GtmI8ys3MzKq6qq0igpdw0bBt//PlxzDbz/ftTViEjcpBP6Vsu6WgcWMLOzgARwc332dfeJ7p5w90RRUVEaJeWu6iEaAC68UEM0iEjTSif0K4FOKcsdgVU1NzKzE4BrgEHuvqE++8ZN587wq1/B3/8OkydHXY2IxEk6oT8L6GZmxWbWAhgGTEvdwMwOA+4iBP5HKU9NB04ys7bJC7gnJdfF3siR0Ls3XHopfPJJ1NWISFzUGfruvhEYSQjrRcAj7r7AzMaY2aDkZjcDewJTzGyumU1L7rsGuIHwxTELGJNcF3sFBfDHP4Yxea68MupqGs/994eb0t58M+pKRATAPMsalROJhJeXl0ddRpP5xS9g7Fh47jk44YSoq8msP/0pdE81C9cujjoq/MtmyJAwLpGIZI6ZzXb3RF3b6Y7ciF13HXTrBuefD19+Wff2ueKhh+C//xsGDIAPP4Rbb4UPPoCysjAkxbhxsEb/5hNpcgr9iLVsCRMnwrJlcP31UVeTGVOmwPDhcOyxMHUq7L03XHYZLFkCTzwRQv+qq6Bjx/Blt2BB1BWLxIdCPwv06wfnnQe/+Q3MmRN1NQ3z5JNwxhnQpw9Mmxa+1KoVFMDgwfDCCzBvXtjugQegRw848cQw4czmzdHVLhIHCv0scfPNUFgYmkQ2boy6mp3zt7+F5ptEAv76V9hjj+1ve8ghYa6BlStD99VFi+AHP4Du3eF3v4O1a5uubpE4UehnibZtQ9jNnh1+5poZM2DoUOjZM9x/0KpVevsVFoaL2e+9F+5ZKCoKF3s7doSf/QyWLm3cukXiRqGfRcrKwtnuddeFEMwVL70U6j7oIHj2WWjTpv6vscsu8KMfwSuvwBtvwKBB8Ic/hIvcgwaFL5Us62gmkpMU+lnEDCZMgGbN4IILciPkXn0VTj4ZunSB558Pg8o11BFHhN4/778P114Lr70WurMecki4tyGfejmJNDWFfpbp1Cn023/2Wfjzn6OuZsfKy0OXzH33DWfie++d2ddv3x7GjIEVK+Dee0Pf/hEjwmc0alS4HiAi9aObs7LQpk3hRqaKinCBs7Aw6oq2NW9e6JLZunVo3unUqe59Gsod/vnPMDz1E0+EfxkNHRquAXzve2FZJK50c1YOqx6i4bPP4PLLo65mWwsXhi6We+wRul82ReBDCPWjj4bHHgsXeC+7LNzJfNRRoUnowQdhw4a6X0ckzhT6WapHj9CE8eCDMD2Lhqh7990w9WNBQQj84uJo6ujaNXRzrawMQ1V/+SWcfXa4tnD99bB6dTR1iWQ7Ne9ksfXroVevcPb69ts77vfeFJYtC2faX30FL74IJTUnzYyQezjrv+22cL9AixZhwppLL4XDD4+6OpHGp+adPLDbbqGZZ/lyGD062lpWrAhn+OvWhV462RT4EJp+Tjop3BS2eHG44Dt1Knz729C3bxgaIldvehPJJIV+luvbNwTY+PHhxq0orFoVAv/TT0OvokMOiaaOdB10EPz+96Hpp3qgtx/+MJzxz50bdXUi0VLo54CbboJ99gnDFH/9ddO+9+rVIfA//BCeeSacOeeK1q23DPQ2ZQpUVYWJa268UWf9El8K/RzQpg3cfns4Sx0/vune9+OPw01RK1aEdvIjj2y6986kggIoLQ3XRYYODZPS9+0bLkqLxI1CP0cMHRomHxk9umnGo/n3v0Mb+ZIlYbTMvn0b/z0b2157hfF9Hn44tPv36hW+TDWyp8SJQj+H3H576JVy/vmNO0TD2rXhTtsFC+Dxx0PzTj4ZNiyc9ffrBz/9afhy0929EhcK/RzSoQP8+tdhyIMHHmic9/j88zCWzuzZ8MgjMHBg47xP1PbbL/T0mTgRXn893Bdx//25Md6RSEMo9HPM+eeHCUouvxw++iizr71uXRjR8pVXYNKkMOFJPjML8xfMmweHHgrnnBOa0TL9uYpkE4V+jmnWLJydfv556JmSKRs2wKmnhpuu7r8/DPMcF/vvDzNnwi23hLkAevQIzVoi+Uihn4NKSsLEI5MmhZBqqK++CiE/fXq4Geyssxr+mrmmoACuuCI0a3XqFM74zz47XNAWySdphb6ZDTCzxWZWYWajann+aDN708w2mllpjec2mdnc5GNapgqPu1Gj4FvfCuPuf/75zr/Oxo1hrtqnngpj+Z93XuZqzEUHHxzG7//lL8OXas+eYXgHkXxRZ+ibWQEwARgIlACnm1nNm/BXAOcAk2p5iXXu3iv5GNTAeiVp113DWfmKFWGmrZ2xaRMMHx5Grbz1VrjooszWmKt22SUM2vbqq7DnnqF3z8iR8MUXUVcm0nDpnOn3BircfZm7fwVMBra6xOfuy919PqAez02oTx+48MIwp+6sWfXbd/PmcBFz0qQwaUsmrw/kiyOOgDffDJ/NH/4Q+vW/+mrUVYk0TDqh3wFI7cVcmVyXrt3MrNzMXjOzIbVtYGYjktuUV1VV1eOlZezYMMNUfYZocIeLLw6zUY0eHZqKpHYtW4Z/Bb3wQvh8jzoqXE/RuP2Sq9IJ/drmI6pPb+bOyeE+zwB+a2YHbPNi7hPdPeHuiaKionq8tLRuHdri588PvU/q4h7OXO+8E666KvrRO3NFv37hMz733PBF27t3WBbJNemEfiWQOjdSR2BVum/g7quSP5cBLwKH1aM+ScPgwXDaaaEdesmS7W/nHs7qb7stjDM/dqymGKyPb3wD7r47XPRevRoSifAZavA2ySXphP4soJuZFZtZC2AYkFYvHDNra2a7Jn8vBPoAC3e2WNm+3/8+jL+/oyEarr8exo0L1wHGj1fg76xTTgnDOAwZEpp6jj56x1+2ItmkztB3943ASGA6sAh4xN0XmNkYMxsEYGZHmFklUAbcZWYLkrt/Cyg3s3nATODX7q7QbwTt24dAnzkztNXXNHZsCP1zzw1j+CjwG6awEP7yl3Ah/J13wkXeCRM0eJtkP02XmEc2b4Zjj4W33gqTl++7b1g/fnwYtuGMM8KYPQUF0daZb1atCvc3PPNMGIr6nnuabrJ4kWqaLjGGqodo+PLL0GYP4ezz8stDm//99yvwG8N++4X5Bu68M3Tp7NkzTGifZedTIoBCP+907w7XXhtGyDzvvHBT0Q9+EJohmjePurr8ZRaup8ybF0L/7LPDF616IEu2UejnoZ//PAwads89YVz8KVPCOPzS+A44IAxad/PNYejmgw+GJ56IuiqRLRT6eahFizBD1NVXw9SpYcgGaToFBXDllWHwto4dw+il55wDn30WdWUiCv28dfDBYQLwli2jriS+evQIg7dddx089FBo9pkxI+qqJO4U+iKNqEULGDMmTEyz++6hd8+Pfxx6/IhEQaEv0gR694Y5c8L1lj//Gbp1C0NgNGRYbJGdodAXaSItW8JNN8GiRaFH1ZgxIfzvvjsMcy3SFBT6Ik1s//3DhfZXX4Xi4jDE9WGHwbPPRl2ZxIFCXyQiRx4J//d/oUvtF19A//6hi+3bb0ddmeQzhb5IhMygtDQMm/Gb38Drr8Ohh8KIEfDhh1FXJ/lIoS+SBXbdNQyXsXQpXHIJ3HcfHHgg3HBDGFZDJFMU+iJZpF27MEDewoWhqeeXvwwXe++7Txd7JTMU+iJZ6MAD4dFH4Z//DHf1nntumLRFN3dJQyn0RbLYUUeFXj4PPwyffhpu7jrllNDtU2RnKPRFslyzZjBsWJis5aabwtl/z55hBrSPPoq6Osk1Cn2RHLHbbuGO3qVLQ+D/8Y+hGWjsWFi3LurqJFco9EVyTGFhmBN5wQI47rgwT2/37mFQN03XKHVR6IvkqO7dw1j9M2dCURH813+FMX7+8Y+oK5NsptAXyXH9+sGsWWGKxo8+CsuDB8PixVFXJtlIoS+SB5o1g7POCkF/443h7P/gg8N0mZqyUVIp9EXySMuWYca0JUvCQG533hku9o4bB+vXR12dZIO0Qt/MBpjZYjOrMLNRtTx/tJm9aWYbzay0xnPDzWxJ8jE8U4WLyPbtsw/ccQfMnw99+8JVV8E3vxn6+7tHXZ1Eqc7QN7MCYAIwECgBTjezkhqbrQDOASbV2LcdMBr4DtAbGG1mbRtetoiko6QEnn4ann8e2rSBM84Io3u+9JJ6+sRV8zS26Q1UuPsyADObDAwGFlZv4O7Lk8/VPIz6A8+5+5rk888BA4CHG1y5iKTt+OPDRO0PPgjXXAPHHBP6/XfpAl271v7YZ58wCqjkl3RCvwOwMmW5knDmno7a9u2Q5r4ikkEFBXDOOVBWFpp5Fi+G5cvDY/Zs+PjjrbdP/VIoLt72S2HvvfWlkIvSCf3a/rOm2yqY1r5mNgIYAdC5c+c0X1pEdsYee8BPfrLt+s8/3/IlUPNRXg6ffLL19i1bbv0vhZpfDEVF+lLIRumEfiXQKWW5I7AqzdevBPrV2PfFmhu5+0RgIkAikdBlJpEI7Lkn9OgRHrX5z3/g/fe3/UJ47z144w1Ys2br7Vu2rL3ZqPrLobBQXwpRSCf0ZwHdzKwY+AAYBpyR5utPB25MuXh7EnB1vasUkci1arXjL4W1a2v/Uli+PMwIVvNLYffdw13FF1wAw4eHiWSk8Zmn0X/LzL4P/BYoAO5x91+Z2Rig3N2nmdkRwONAW2A98KG7H5zc98fAL5Iv9St3v3dH75VIJLy8vHyn/yARyU6pXwrvvRd+vvRSuJ7Qvj1ccUWYJrJVq6grzU1mNtvdE3Vul07oNyWFvkh8uIeJYcaOhRdegLZt4ac/DVNG7rVX1NXllnRDX3fkikhkzMLEMDNmhCagY46BMWOgc2e47DKorIy6wvyj0BeRrNC7Nzz+eBgyurQ0DB+9//5w3nnw7rtRV5c/FPoiklVKSuD++6GiIrTxT5oUhpD44Q9hzpyoq8t9Cn0RyUpdu8Ltt4cLvqNGwfTpcPjhMGBAmDMgyy5H5gyFvohktX32CcNFr1gRLvjOmRPmDOjTB556SuFfXwp9EckJrVuHM/7ly2HCBFi1CgYNgkMOCU1AGzdGXWFuUOiLSE5p2RIuuijMGfDAA2G00DPPDDd63Xmn5g2oi0JfRHLSLruEeYHfeivMFVxUBBdeGIZ5GDcu3Awm21Loi0hOa9YszAn86qvhBq+ePcOkMV26wLXXZu90kevXh+6pTz4Jt9wShqO49trGf990xt4REcl6ZnDsseFRXh4u+t54I9x6a5g68oorwk1fTWn9eli6NHQ/XbIkPKp/r6zc+iL0XnuFG9Uam4ZhEJG8tWhRaOp56KGwfNZZW6aOzJR162DZsq0DfUfB3q1bmLe45s+2DZxTUGPviIgkrVgRmlDuvjucfZ96aphAPlFnRAbr1m19xp76symDfUcU+iIiNVRVwW23hZu+PvsMTjwxhH+/fluaYrZ3xp6qZrBX/97Ywb4jCn0Rke1YuzZ077z1Vli9OkzoUnO6yMLC2s/Wowz2HVHoi4jUYf16uO++MMJncXH2B/uOKPRFRGJE4+mLiMg2FPoiIjGi0BcRiRGFvohIjCj0RURiRKEvIhIjCn0RkRhR6IuIxEjW3ZxlZlXA+1HX0UCFwMd1bhUf+jy2ps9jC30WW2vI59HF3Yvq2ijrQj8fmFl5OnfGxYU+j63p89hCn8XWmuLzUPOOiEiMKPRFRGJEod84JkZdQJbR57E1fR5b6LPYWqN/HmrTFxGJEZ3pi4jEiEK/gcysk5nNNLNFZrbAzC5Nrm9nZs+Z2ZLkzxybkmHnmVmBmc0xs6eTy8Vm9nrys/iLmbWIusamYmZtzOxRM3sneYx8N+bHxmXJ/0/eNrOHzWy3OB0fZnaPmX1kZm+nrKv1eLDgd2ZWYWbzzezwTNSg0G+4jcAV7v4t4EjgYjMrAUYBM9y9GzAjuRwXlwKLUpZvAsYnP4tPgfMiqSoatwHPuPs3gUMJn0ssjw0z6wBcAiTcvQdQAAwjXsfHfcCAGuu2dzwMBLolHyOAOzJSgbvrkcEH8CRwIrAYaJ9c1x5YHHVtTfT3d0weuMcBTwNGuNmkefL57wLTo66ziT6LbwDvkbx2lrI+rsdGB2Al0A5onjw++sft+AC6Am/XdTwAdwGn17ZdQx46088gM+sKHAa8Duzj7v8CSP7cO7rKmtRvgZ8Dm5PLewH/dveNyeVKwv/8cbA/UAXcm2zuutvM9iCmx4a7fwDcAqwA/gV8BswmvsdHte0dD9VfktUy8tko9DPEzPYEHgO7WXQ4AAABsklEQVR+5u5ro64nCmZ2CvCRu89OXV3LpnHpMtYcOBy4w90PA74gJk05tUm2VQ8GioH9gD0ITRg1xeX4qEuj/L+j0M8AM9uFEPh/dvepydWrzax98vn2wEdR1deE+gCDzGw5MJnQxPNboI2ZNU9u0xFYFU15Ta4SqHT315PLjxK+BOJ4bACcALzn7lXu/jUwFfge8T0+qm3veKgEOqVsl5HPRqHfQGZmwJ+ARe5+a8pT04Dhyd+HE9r685q7X+3uHd29K+EC3QvufiYwEyhNbhaLzwLA3T8EVppZ9+Sq44GFxPDYSFoBHGlmuyf/v6n+PGJ5fKTY3vEwDTg72YvnSOCz6maghtDNWQ1kZkcB/wTeYks79i8I7fqPAJ0JB3uZu6+JpMgImFk/4Ep3P8XM9iec+bcD5gBnufuGKOtrKmbWC7gbaAEsA84lnGzF8tgws+uBHxF6vc0BfkJop47F8WFmDwP9CKNprgZGA09Qy/GQ/GK8ndDb50vgXHcvb3ANCn0RkfhQ846ISIwo9EVEYkShLyISIwp9EZEYUeiLiMSIQl9EJEYU+iIiMaLQFxGJkf8Ha1ilYdOL6iMAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xbd2a390>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(Ks, np.array(CH_scores), 'b-')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "colors = ['b','g','r','k','c','m','y','#e24fff','#524C90','#845868']\n",
    "\n",
    "n_clusters = 10\n",
    "mb_kmeans = MiniBatchKMeans(n_clusters = n_clusters)\n",
    "mb_kmeans.fit(EventCount)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
